From 42807626761e52746fd88b39c76223bc85e49e50 Mon Sep 17 00:00:00 2001
From: Timothy Pearson <tpearson@raptorengineeringinc.com>
Date: Thu, 25 Jun 2015 18:08:53 -0500
Subject: [PATCH 074/143] northbridge/amd/amdmct/mct_ddr3: Attempt to recover
 from phy training errors

Change-Id: Ia2c3022534c9ad44714eef6e118869f054bd9f6b
Signed-off-by: Timothy Pearson <tpearson@raptorengineeringinc.com>
---
 src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c  |   68 +++++++++++++++++++------
 src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c |   36 ++++++++++---
 2 files changed, 83 insertions(+), 21 deletions(-)

diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c b/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c
index 5e81808..539cb0d 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mcthwl.c
@@ -18,11 +18,11 @@
  * Foundation, Inc.
  */
 
-static void AgesaHwWlPhase1(struct MCTStatStruc *pMCTstat,
+static uint8_t AgesaHwWlPhase1(struct MCTStatStruc *pMCTstat,
 					struct DCTStatStruc *pDCTstat, u8 dct, u8 dimm, u8 pass);
-static void AgesaHwWlPhase2(struct MCTStatStruc *pMCTstat,
+static uint8_t AgesaHwWlPhase2(struct MCTStatStruc *pMCTstat,
 					struct DCTStatStruc *pDCTstat, u8 dct, u8 dimm, u8 pass);
-static void AgesaHwWlPhase3(struct MCTStatStruc *pMCTstat,
+static uint8_t AgesaHwWlPhase3(struct MCTStatStruc *pMCTstat,
 					struct DCTStatStruc *pDCTstat, u8 dct, u8 dimm, u8 pass);
 static void EnableZQcalibration(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat);
 static void DisableZQcalibration(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat);
@@ -100,11 +100,12 @@ static void DisableAutoRefresh_D(struct MCTStatStruc *pMCTstat,
 }
 
 
-static void PhyWLPass1(struct MCTStatStruc *pMCTstat,
+static uint8_t PhyWLPass1(struct MCTStatStruc *pMCTstat,
 					struct DCTStatStruc *pDCTstat, u8 dct)
 {
 	u8 dimm;
 	u16 DIMMValid;
+	uint8_t status = 0;
 	void *DCTPtr;
 
 	dct &= 1;
@@ -121,19 +122,22 @@ static void PhyWLPass1(struct MCTStatStruc *pMCTstat,
 		PrepareC_DCT(pMCTstat, pDCTstat, dct);
 		for (dimm = 0; dimm < MAX_DIMMS_SUPPORTED; dimm ++) {
 			if (DIMMValid & (1 << (dimm << 1))) {
-				AgesaHwWlPhase1(pMCTstat, pDCTstat, dct, dimm, FirstPass);
-				AgesaHwWlPhase2(pMCTstat, pDCTstat, dct, dimm, FirstPass);
-				AgesaHwWlPhase3(pMCTstat, pDCTstat, dct, dimm, FirstPass);
+				status |= AgesaHwWlPhase1(pMCTstat, pDCTstat, dct, dimm, FirstPass);
+				status |= AgesaHwWlPhase2(pMCTstat, pDCTstat, dct, dimm, FirstPass);
+				status |= AgesaHwWlPhase3(pMCTstat, pDCTstat, dct, dimm, FirstPass);
 			}
 		}
 	}
+
+	return status;
 }
 
-static void PhyWLPass2(struct MCTStatStruc *pMCTstat,
+static uint8_t PhyWLPass2(struct MCTStatStruc *pMCTstat,
 					struct DCTStatStruc *pDCTstat, u8 dct)
 {
 	u8 dimm;
 	u16 DIMMValid;
+	uint8_t status = 0;
 	void *DCTPtr;
 
 	dct &= 1;
@@ -163,12 +167,14 @@ static void PhyWLPass2(struct MCTStatStruc *pMCTstat,
 		DisableAutoRefresh_D(pMCTstat, pDCTstat);
 		for (dimm = 0; dimm < MAX_DIMMS_SUPPORTED; dimm ++) {
 			if (DIMMValid & (1 << (dimm << 1))) {
-				AgesaHwWlPhase1(pMCTstat, pDCTstat, dct, dimm, SecondPass);
-				AgesaHwWlPhase2(pMCTstat, pDCTstat, dct, dimm, SecondPass);
-				AgesaHwWlPhase3(pMCTstat, pDCTstat, dct, dimm, SecondPass);
+				status |= AgesaHwWlPhase1(pMCTstat, pDCTstat, dct, dimm, SecondPass);
+				status |= AgesaHwWlPhase2(pMCTstat, pDCTstat, dct, dimm, SecondPass);
+				status |= AgesaHwWlPhase3(pMCTstat, pDCTstat, dct, dimm, SecondPass);
 			}
 		}
 	}
+
+	return status;
 }
 
 static uint16_t fam15h_next_highest_memclk_freq(uint16_t memclk_freq)
@@ -183,6 +189,8 @@ static uint16_t fam15h_next_highest_memclk_freq(uint16_t memclk_freq)
 static void WriteLevelization_HW(struct MCTStatStruc *pMCTstat,
 					struct DCTStatStruc *pDCTstat, uint8_t Pass)
 {
+	uint8_t status;
+	uint8_t timeout;
 	uint16_t final_target_freq;
 
 	pDCTstat->C_MCTPtr  = &(pDCTstat->s_C_MCTPtr);
@@ -201,8 +209,21 @@ static void WriteLevelization_HW(struct MCTStatStruc *pMCTstat,
 	}
 
 	if (Pass == FirstPass) {
-		PhyWLPass1(pMCTstat, pDCTstat, 0);
-		PhyWLPass1(pMCTstat, pDCTstat, 1);
+		timeout = 0;
+		do {
+			status = 0;
+			timeout++;
+			status |= PhyWLPass1(pMCTstat, pDCTstat, 0);
+			status |= PhyWLPass1(pMCTstat, pDCTstat, 1);
+			if (status)
+				printk(BIOS_INFO,
+					"%s: Retrying write levelling due to invalid value(s) detected in first phase\n",
+					__func__);
+		} while (status && (timeout < 8));
+		if (status)
+			printk(BIOS_INFO,
+				"%s: Uncorrectable invalid value(s) detected in first phase of write levelling\n",
+				__func__);
 	}
 
 	if (Pass == SecondPass) {
@@ -211,6 +232,7 @@ static void WriteLevelization_HW(struct MCTStatStruc *pMCTstat,
 			 * NOTE: BIOS must program both DCTs to the same frequency.
 			 * NOTE: Fam15h steps the frequency, Fam10h slams the frequency.
 			 */
+			uint8_t global_phy_training_status = 0;
 			final_target_freq = pDCTstat->TargetFreq;
 
 			while (pDCTstat->Speed != final_target_freq) {
@@ -219,12 +241,28 @@ static void WriteLevelization_HW(struct MCTStatStruc *pMCTstat,
 				else
 					pDCTstat->TargetFreq = final_target_freq;
 				SetTargetFreq(pMCTstat, pDCTstat);
-				PhyWLPass2(pMCTstat, pDCTstat, 0);
-				PhyWLPass2(pMCTstat, pDCTstat, 1);
+				timeout = 0;
+				do {
+					status = 0;
+					timeout++;
+					status |= PhyWLPass2(pMCTstat, pDCTstat, 0);
+					status |= PhyWLPass2(pMCTstat, pDCTstat, 1);
+					if (status)
+						printk(BIOS_INFO,
+							"%s: Retrying write levelling due to invalid value(s) detected in last phase\n",
+							__func__);
+				} while (status && (timeout < 8));
+				global_phy_training_status |= status;
 			}
 
 			pDCTstat->TargetFreq = final_target_freq;
 
+			if (global_phy_training_status)
+				printk(BIOS_WARNING,
+					"%s: Uncorrectable invalid value(s) detected in second phase of write levelling; "
+					"continuing but system may be unstable!\n",
+					__func__);
+
 			uint8_t dct;
 			for (dct = 0; dct < 2; dct++) {
 				sDCTStruct *pDCTData = pDCTstat->C_DCTPtr[dct];
diff --git a/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c b/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c
index 0e626fa..403c87c 100644
--- a/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c
+++ b/src/northbridge/amd/amdmct/mct_ddr3/mhwlc_d.c
@@ -54,7 +54,7 @@ static int32_t abs(int32_t val) {
  */
 
 /*-----------------------------------------------------------------------------
- * void AgesaHwWlPhase1(SPDStruct *SPDData,MCTStruct *MCTData, DCTStruct *DCTData,
+ * uint8_t AgesaHwWlPhase1(SPDStruct *SPDData,MCTStruct *MCTData, DCTStruct *DCTData,
  *                  u8 Dimm, u8 Pass)
  *
  *  Description:
@@ -71,7 +71,7 @@ static int32_t abs(int32_t val) {
  *       OUT
  *-----------------------------------------------------------------------------
  */
-void AgesaHwWlPhase1(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat,
+uint8_t AgesaHwWlPhase1(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat,
 		u8 dct, u8 dimm, u8 pass)
 {
 	u8 ByteLane;
@@ -174,12 +174,15 @@ void AgesaHwWlPhase1(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTsta
 	}
 
 	pDCTData->WLCriticalGrossDelayPrevPass = 0x1f;
+
+	return 0;
 }
 
-void AgesaHwWlPhase2(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat,
+uint8_t AgesaHwWlPhase2(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat,
 		u8 dct, u8 dimm, u8 pass)
 {
 	u8 ByteLane;
+	uint8_t status = 0;
 	sDCTStruct *pDCTData = pDCTstat->C_DCTPtr[dct];
 
 	if (is_fam15h()) {
@@ -206,19 +209,38 @@ void AgesaHwWlPhase2(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTsta
 
 		/* Compensate for occasional noise/instability causing sporadic training failure */
 		for (ByteLane = 0; ByteLane < MAX_BYTE_LANES; ByteLane++) {
+			uint8_t faulty_value_detected = 0;
 			uint16_t total_delay_seed = ((pDCTData->WLSeedGrossDelay[index+ByteLane] & 0x1f) << 5) | (pDCTData->WLSeedFineDelay[index+ByteLane] & 0x1f);
 			uint16_t total_delay_phy = ((pDCTData->WLGrossDelay[index+ByteLane] & 0x1f) << 5) | (pDCTData->WLFineDelay[index+ByteLane] & 0x1f);
-			if (abs(total_delay_phy - total_delay_seed) > 0x20) {
-				printk(BIOS_DEBUG, "%s: overriding faulty phy value (seed: %04x phy: %04x step: %04x)\n", __func__,
+			if (pass == FirstPass) {
+				/* Allow a somewhat higher step threshold on the first pass
+				 * For the most part, as long as the phy isn't stepping
+				 * several clocks at once the values are probably valid.
+				 */
+				if (abs(total_delay_phy - total_delay_seed) > 0x30)
+					faulty_value_detected = 1;
+			} else {
+				/* Stepping memory clocks between adjacent allowed frequencies
+				 *  should not yield large phy value differences...
+				 */
+
+				if (abs(total_delay_phy - total_delay_seed) > 0x20)
+					faulty_value_detected = 1;
+			}
+			if (faulty_value_detected) {
+				printk(BIOS_INFO, "%s: overriding faulty phy value (seed: %04x phy: %04x step: %04x)\n", __func__,
 					total_delay_seed, total_delay_phy, abs(total_delay_phy - total_delay_seed));
 				pDCTData->WLGrossDelay[index+ByteLane] = pDCTData->WLSeedGrossDelay[index+ByteLane];
 				pDCTData->WLFineDelay[index+ByteLane] = pDCTData->WLSeedFineDelay[index+ByteLane];
+				status = 1;
 			}
 		}
 	}
+
+	return status;
 }
 
-void AgesaHwWlPhase3(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat,
+uint8_t AgesaHwWlPhase3(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTstat,
 		u8 dct, u8 dimm, u8 pass)
 {
 	u8 ByteLane;
@@ -285,6 +307,8 @@ void AgesaHwWlPhase3(struct MCTStatStruc *pMCTstat, struct DCTStatStruc *pDCTsta
 	 * to the normal operating termination:
 	 */
 	prepareDimms(pMCTstat, pDCTstat, dct, dimm, FALSE);
+
+	return 0;
 }
 
 /*----------------------------------------------------------------------------
-- 
1.7.9.5

